{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c529c813",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import os\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fefdd404",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Get the current working directory\n",
    "current_directory = os.getcwd()\n",
    "\n",
    "# Construct the full path to the file\n",
    "file_path = os.path.join(current_directory, 'ch1.pkl')\n",
    "\n",
    "# Load the pickle file into a DataFrame\n",
    "df = pd.read_pickle(file_path)\n",
    "\n",
    "# Display the DataFrame\n",
    "print(df)\n",
    "\n",
    "df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9212ced5",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Ensure the 'date' column is in datetime format\n",
    "df['date'] = pd.to_datetime(df['date'])\n",
    "\n",
    "# Find the earliest date\n",
    "earliest_date = df['date'].min()\n",
    "\n",
    "# Find the latest date\n",
    "latest_date = df['date'].max()\n",
    "\n",
    "# Print the results\n",
    "print(f\"Earliest date: {earliest_date}\")\n",
    "print(f\"Latest date: {latest_date}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2c8dbf0c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Calculate the mean of 'textlength'\n",
    "mean_textlength = df['textlength'].mean()\n",
    "\n",
    "# Calculate the standard deviation of 'textlength'\n",
    "sd_textlength = df['textlength'].std()\n",
    "\n",
    "# Print the results\n",
    "print(f\"Mean of textlength: {mean_textlength}\")\n",
    "print(f\"Standard Deviation of textlength: {sd_textlength}\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "02c81e5b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Convert the 'date' column to datetime format if it's not already\n",
    "df['date'] = pd.to_datetime(df['date'])\n",
    "\n",
    "# Group by date and count the number of episodes per day\n",
    "daily_episodes = df.groupby('date').size()\n",
    "\n",
    "# Calculate the average number of episodes per day\n",
    "mean_daily_episodes = daily_episodes.mean()\n",
    "\n",
    "# Calculate the standard deviation of daily episode counts\n",
    "sd_daily_episodes = daily_episodes.std()\n",
    "\n",
    "# Print the results\n",
    "print(f\"Average daily episodes: {mean_daily_episodes}\")\n",
    "print(f\"Standard Deviation of daily episodes: {sd_daily_episodes}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cad738a2",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Ensure the 'date' column is in datetime format\n",
    "df['date'] = pd.to_datetime(df['date'])\n",
    "\n",
    "# Calculate daily episode counts\n",
    "daily_episode_counts = df.groupby('date').size()\n",
    "\n",
    "# Calculate the moving average with a window of 50 days\n",
    "moving_average = daily_episode_counts.rolling(window=50).mean()\n",
    "\n",
    "# Set the font to Times New Roman globally\n",
    "plt.rcParams['font.family'] = 'Times New Roman'\n",
    "\n",
    "# Plot the daily episode counts\n",
    "plt.figure(figsize=(10, 4))\n",
    "plt.plot(daily_episode_counts, color='black', label='Daily Episode Count')\n",
    "plt.plot(moving_average, color='gray', linewidth=2, label='50-Day Moving Average')\n",
    "\n",
    "# Style the plot\n",
    "plt.fill_between(daily_episode_counts.index, daily_episode_counts, color='black')  # Fill below the line in black\n",
    "plt.title('Daily Episode Counts with 50-Day Moving Average', fontsize=14)\n",
    "plt.ylabel('Number of Episodes', fontsize=12)\n",
    "plt.legend()\n",
    "plt.xticks(fontsize=10)\n",
    "plt.yticks(fontsize=10)\n",
    "\n",
    "# Save the figure in the same folder as the script\n",
    "current_directory = os.getcwd()\n",
    "save_path = os.path.join(current_directory, \"F1.jpg\")\n",
    "plt.savefig(save_path, dpi=500, bbox_inches='tight')\n",
    "\n",
    "# Display the figure\n",
    "plt.show()\n",
    "\n",
    "# Print confirmation message\n",
    "print(f\"Figure saved at: {save_path}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
